import requests
import json
from time import sleep
from bs4 import BeautifulSoup
import re
import html

"""
    4/24
    去掉html
    去掉思维链，标记是<details>，对于聊天助手够用了，后面要做成可选
    
"""

# http://10.10.93.198/v1
API_URL = "http://10.10.93.198:8090/v1/chat-messages"
API_KEY = "app-SRoouEPrCLS0UW1uuwEThKhG"  # 替换成你的真实 API 密钥
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

# 测试集
test_queries = [
    "What are the specs of the iPhone 13 Pro Max?",
    "Explain the concept of quantum entanglement.",
    "Who won the World Cup in 2022?"
]

results = []

for i, query in enumerate(test_queries):
    payload = {
        "inputs": {},
        "query": query,
        "response_mode": "blocking",
        "conversation_id": "",
        "user": "abc-123",
        "files": []
    }

    try:
        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=90)
        response.raise_for_status()
        data = response.json()
        answer = data.get("answer", "")

        # 还需html解析，还需去掉思维链

        # 1. 去除思维链： < details > 标签块（可选）
        cleaned = re.sub(r"<details.*?</details>", "", answer, flags=re.DOTALL)
        # 2. 去除所有 HTML 标签
        cleaned = re.sub(r"<[^>]+>", "", cleaned)
        # 3. 将转义字符还原（如 \n, \", &nbsp; 等）
        cleaned_answer = html.unescape(cleaned)

    except Exception as e:
        cleaned_answer = f"[Error] {e}"

    results.append({
        "query": query,
        "answer": cleaned_answer
    })

    print(f"[{i+1}/{len(test_queries)}] Done.")
    sleep(1)  # 可选：避免请求过快
    break

# 保存为 JSON
with open("data/聊天助手_answers.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print("所有查询已完成，答案已保存到 聊天助手_answers.json")
